
// Global switches to turn on/off sections of the do file
global Data_Electric_section1 					"1" // take large csv file and break them into manageable xlsx's. name vars. 
global Data_Electric_section2_prelim 			"1" // always turn it on for section 2 cleaning of raw rates into panel format. 
global Data_Electric_section2_flatrate 			"1" // flat rate cleaning. follows section2_prelim, which has to be turned on for this section to run. 
global section2_manual		"1" // add missing flat rate manually to data cleaned earlier. 
global section2_manual_IOUs	"1" // manually enter SDGE rates. 

global Data_Electric_section2_EVrate 			"1"


// merge all data
global Data_Electric_section3 			"1" // needs all macros turned on
global Data_Electric_section4 			"1" 

// section1: take the list of EIAIDs from EIA 861
// note: we kept 37 utilities as defined by EIAIDs.
if $Data_Electric_section1 == 1 {

	import excel "${ElectricityRates}/Utility_Data_2017.xlsx", sheet("States") cellrange(A2) firstrow clear
	keep if State == "CA"
	
	rename UtilityNumber eiaid
	preserve
		keep eiaid
		tempfile eiaid_list
		save `eiaid_list', replace
	restore
	tempfile eia861
	save `eia861', replace

	//fix variable names 
	import excel  "${ElectricityRates}/usurdb.xlsx", cellrange(A1:SW1) clear
	foreach var of varlist AB-DM {
		replace `var' = subinstr(`var', "flatdemandstructure", "FDS", .)
		replace `var' = subinstr(`var', "period", "p", .)
		replace `var' = subinstr(`var', "/", "_", .)
		replace `var' = subinstr(`var', "tier", "t", .)
	}
	
	foreach var of varlist EA-JB {
		replace `var' = subinstr(`var', "demandratestructure", "DRS", .)
		replace `var' = subinstr(`var', "period", "p", .)
		replace `var' = subinstr(`var', "/", "_", .)
		replace `var' = subinstr(`var', "tier", "t", .)
	}
	
	foreach var of varlist JJ-SC {
		replace `var' = subinstr(`var', "energyratestructure", "ERS", .)
		replace `var' = subinstr(`var', "period", "p", .)
		replace `var' = subinstr(`var', "/", "_", .)
		replace `var' = subinstr(`var', "tier", "t", .)
	}
	foreach var of varlist SM-SU {
		replace `var' = subinstr(`var', "coincidentratestructure", "CRS", .)
		replace `var' = subinstr(`var', "period", "p", .)
		replace `var' = subinstr(`var', "/", "_", .)
		replace `var' = subinstr(`var', "tier", "t", .)
	}
	gen sortvar = 0
	drop B
	tempfile append
	save `append', replace
	
	// append to data
	import excel  "${ElectricityRates}/usurdb_new.xlsx",  clear
	gen sortvar = 1
	gen n = _n
	drop if n == 1
	drop n
	ren B eiaid
	destring eiaid, replace
	
	merge m:1 eiaid using `eiaid_list'
	
	keep if _merge == 3 
	drop _merge	
	
	append using `append'
	sort sortvar
	drop sortvar
	export excel using "${ElectricityRates}/usurdb_modified.xlsx", replace
}

if $Data_Electric_section2_prelim == 1 {
	// import dataset with right varnames, clean.
	import excel  "${ElectricityRates}/usurdb_modified.xlsx", firstrow clear
	
	ren B eiaid
	
	// keep residential rates only.
	keep if sector == "Residential"
	// break down the rate/tier schedule vectors.
	// These vector hold schedule type info by month and hour of day. we essentially break these into categorical variables, then match to rate info.
	ren energyweekdayschedule ERSWkdaySch
	ren energyweekendschedule ERSWkendSch
	foreach var in ERSWkdaySch ERSWkendSch {
		replace `var' = subinstr(`var', "[[", "[", .)
		replace `var' = subinstr(`var', "]]", "]", .)
		split `var', p("], [") generate(`var'M)
		forvalues i = 1/12 {
			replace `var'M`i' = subinstr(`var'M`i', "]", "", .)
			replace `var'M`i' = subinstr(`var'M`i', "[", "", .)
			replace `var'M`i' = subinstr(`var'M`i', " ", "", .)
			split `var'M`i', p(",") generate(`var'M`i'H)
			drop `var'M`i'
			forvalues j = 1/24 {
				destring `var'M`i'H`j', replace
				local j_1 = `j' - 1
				ren `var'M`i'H`j' `var'H`j_1'M`i'
				tab `var'H`j_1'M`i' if `var'H`j_1'M`i' == ., mi 
			}
		}
	}
	
	drop ERSWkdaySch ERSWkendSch
	foreach var of varlist ERS_p0_t0max-ERS_p23_t0sell {
		cap destring `var', replace force
	}
	// keep relevant variables for the ERS section.
	
	destring fixedchargefirstmeter, replace force
	replace fixedchargefirstmeter =  fixedchargefirstmeter*30 if fixedchargeunits == "$/day" 
	keep label eiaid name utility startdate enddate supersedes ERS_p0_t0max-ERS_p23_t0sell ERSWkdaySch* ERSWkendSch* lastupdate fixedchargefirstmeter
	sort eiaid name
	order label eiaid utility name startdate enddate supersedes
	
	// clean up date variables, and cut out redundant rate info. 
	replace startdate = trim(startdate)
	replace enddate = trim(enddate)
	
	duplicates tag eiaid utility ERS_p0_t0max-ERS_p23_t0sell ERSWkdaySch* ERSWkendSch*, gen(tag1) // rates that do not substantially change.
	
	sort eiaid utility ERS_p0_t0max-ERS_p23_t0sell ERSWkdaySch* ERSWkendSch*
	
	// 	format tiered rates first, i.e. get rid of EV and ToU rates for now.
	gen ToU = regexm(name, "Time of Use") == 1 | regexm(name, "TOU") == 1 | regexm(name, "ToU") == 1 | regexm(name, "TOD") == 1 | regexm(name, "Time-of-Use") == 1
	gen EV = regexm(name, "Electric Vehicle") == 1 | regexm(name, "EV") == 1 
	
	// drop duplicates within the following category of indicators
	isid label 
	sort eiaid utility EV ToU ERS_p0_t0max-ERS_p23_t0sell ERSWkdaySch* ERSWkendSch* startdate enddate label
	bys eiaid utility EV ToU ERS_p0_t0max-ERS_p23_t0sell ERSWkdaySch* ERSWkendSch* startdate enddate (label): keep if _n == 1
	
	replace name = subinstr(name, "- ", " ", .)
	replace name = subinstr(name, " -", " ", .)
	replace name = subinstr(name, "  ", " ", .)
	replace name = subinstr(name, "  ", " ", .)

	tempfile sec2_prelim
	save `sec2_prelim', replace
	
}	
if $Data_Electric_section2_flatrate == 1 {	
	use `sec2_prelim', clear

	bys eiaid: egen mean_ToU = mean(ToU)
	bys eiaid: egen mean_EV = mean(EV)
	assert mean_ToU != 1
	assert mean_EV != 1
	drop mean_EV mean_ToU
	keep if ToU == 0 & EV == 0
	
	// Check that within tier there is no time variation in base rates. 
	
	forvalues m = 1/12 {
		gen TVarERSWkendSchM`m' = 0
		gen TVarERSWkdaySchM`m' = 0
		forvalues i = 0/22 {
			local j = `i'+1
			replace TVarERSWkendSchM`m' = 1 if ERSWkendSchH`i'M`m' != ERSWkendSchH`j'M`m'
			replace TVarERSWkdaySchM`m' = 1 if ERSWkdaySchH`i'M`m' != ERSWkdaySchH`j'M`m'
		}	
	}
	
	// So construct min, max, mean daily rates for each tier. 
	preserve
		keep label eia startdate enddate lastupdate
		
		forvalues m = 1/12 {
			gen mvar`m' = 1
		}
		forvalues y = 2014/2018 {
			gen yvar`y' =  1
		}
		reshape long mvar, i(label eia startdate enddate yvar*) j(month)
		drop mvar
		reshape long yvar, i(label eia startdate enddate month) j(year)
		drop yvar
		sort label eia startdate enddate lastupdate year month
		order label eia startdate enddate lastupdate year month
		tostring year, replace
		tostring month, replace
		gen refdatestr =  month + "/" + "15" + "/" + year 
		gen refdate = date(refdatestr, "MDY")
		format refdate %td
		foreach var in startdate enddate lastupdate {
			ren `var' `var'_t
			gen `var'_t1 = clock(`var'_t, "MDYhm")
			format `var'_t1 %tc
			gen `var' = dofc(`var'_t1)
			format `var' %td
		}
		// trim panel to applicable ranges.
		drop if refdate < startdate & startdate !=. // too early
		drop if refdate > enddate & enddate != . // too late
		gen pastlastupdate = (enddate == .) & (refdate >  lastupdate)
		keep eiaid label year month pastlastupdate
		destring year, replace
		destring month, replace

		tempfile paneltemplate
		save `paneltemplate', replace
	restore	
	
	summ ERSWkdaySchH*
	

	// create variables indicating the shares of each period indexes within the day
	forvalues i = 0/5 { // tier period number
		forvalues m = 1/12 { // month
			egen ShERSWkdayM`m'_p`i' = anycount(ERSWkdaySchH*M`m'), values(`i')
			replace ShERSWkdayM`m'_p`i' = ShERSWkdayM`m'_p`i' /24
		}
	}
	forvalues m = 1/12 { // month
			egen ShERSWkdayM`m'_summ = rowtotal(ShERSWkdayM`m'_p?)
			
		}
	summ ShERSWkdayM*_summ
	// create daily average rate by each tier for each month.
	// m is month, k is tier, i is period
	
	// first add adjustment to rates
	forvalues k = 0/4 { // tiers within each period index
		forvalues i = 0/5 {
			replace ERS_p`i'_t`k'rate = ERS_p`i'_t`k'rate  + ERS_p`i'_t`k'adj if !mi(ERS_p`i'_t`k'adj)
		}
	}	
	forvalues m = 1/12 {
		forvalues k = 0/4 { // tiers within each period index
			forvalues i = 0/5 {
				gen temprate_m`m'_p`i'_t`k' = ERS_p`i'_t`k'rate * ShERSWkdayM`m'_p`i'
				gen tempmax_m`m'_p`i'_t`k' = ERS_p`i'_t`k'max * ShERSWkdayM`m'_p`i' if !mi(ERS_p`i'_t`k'max)
			} // end period index loop
			// create average rates
			egen ERSWkdayAvgRate_t`k'M`m' = rowtotal(temprate_m`m'_p*_t`k')
			replace ERSWkdayAvgRate_t`k'M`m' = . if ERSWkdayAvgRate_t`k'M`m' == 0
			// average max.
			egen ERSWkdayMax_t`k'M`m' = rowtotal(tempmax_m`m'_p*_t`k') 
			egen rownonmiss_t`k'M`m' = rownonmiss(tempmax_m`m'_p*_t`k') 
			replace ERSWkdayMax_t`k'M`m' = . if rownonmiss_t`k'M`m' == 0			
			drop temprate_m`m'_p?_t`k' rownonmiss_t`k'M`m' 
		} // end tier loop
	} // end month loop
	drop ShERSWkdayM*
	keep label eia name utility startdate enddate supersedes lastupdate ERSWkdayAvgRate_t?M* ERSWkdayMax_t?M* TVarERSWkendSchM* TVarERSWkdaySchM* fixedchargefirstmeter 
	duplicates drop
	
	duplicates tag supersedes, gen(tag)
		br if tag == 1
		drop if label == "539fbda4ec4f024c27d87e07"
	drop tag
	
	reshape long ERSWkdayAvgRate_t0M ERSWkdayAvgRate_t1M ERSWkdayAvgRate_t2M ///
		ERSWkdayAvgRate_t3M ERSWkdayAvgRate_t4M TVarERSWkdaySchM ERSWkdayMax_t0M ///
		ERSWkdayMax_t1M ERSWkdayMax_t2M ERSWkdayMax_t3M ERSWkdayMax_t4M, ///
		i(label utility eia name lastupdate startdate enddate supersedes fixedchargefirstmeter) j(month)
	
	merge 1:m label month using `paneltemplate'	
	keep if _merge == 3
	drop _merge
	sort eiaid label year month
	order eiaid label year month
	duplicates tag supersedes year month if supersedes != "", gen(tag2)
	assert (tag2 == 0 | tag == .)
	drop tag2
	// save the cleaned files so far
	tempfile saved1
	save `saved1', replace
		
	// keep which obs to keep for each EIAID.
	summ TVarERSWkdaySchM
		
		/*Run a series of merge and delete commands to create a wide  dataset 
		where i is a new identifier for the string of labels, j is the order of 
		those strings chronologically. Start with the most recent, then work back in time.
		*/
		keep label supersedes
		duplicates drop

		ren supersedes label0
		ren label label1
		
		preserve
			keep if label0 != ""
			ren label1 label2
			ren label0 label1
			tempfile supersedes_a
			save `supersedes_a', replace
		restore
		merge 1:1 label1 using `supersedes_a'
		/* _merge results:
		1. means the label is not followed by another label.
		2. means the supersedes variable value was not in our rate sheet
		3. means the supersedes variable value was in our rate sheet.
		*/
		order label0 label1 label2
		
		sort _merge label0 label1 label2
		gen label_unused = label1 != "" & _merge == 2
		
		// omit obs's that are non existing superseding labels from the list of strings to be merged onto again.
		levelsof label1 if _merge == 2, local(match_a)
		gen initialstring = 0 // within matched obs, identify ones starting with a label (0) that do not map to others in label1
		foreach num of local match_a {
			replace initialstring = 1 if label0 == "`num'" 
		}	
		sort _merge initialstring label0 label1 label2
		
		// create indicators for values dropped
		* drop observations whose preceding label was matched to the existing list of labels. 
		gen drop_end = 0 
		levelsof label1 if _merge == 3, local(match_b)
		foreach num of local match_b {
			replace drop_end = 1 if label0 == "`num'" & _merge == 1
		}	
		drop if drop_end == 1
		
		// drop obs whose superseding labels did not match.
		drop if _merge == 2
		drop drop_end label_unused
		
		// create indicators for whether the beginning and the end of the series are identified for each row
		// The objective is to set these dummies to one for all obs.
		gen begin_series = 0
		replace begin_series = 1 if initialstring == 1
		replace begin_series = 1 if label0 == "" // this is essentially is if label0 == 0, no superseding label, so the begging of a series
		
		gen end_series = label2 == "" // i.e. not matched  in the initial merge
		* drop _merge
		order begin_series end_series initialstring label0 label1 label2
		
		preserve
			keep if _merge == 3 & begin_series == 0
			ren label2 label3
			ren label1 label2
			ren label0 label1
			keep label1 label2 label3
			tempfile supersedes_b
			save `supersedes_b', replace
		restore
		// drop the series obs that we are now merging
		drop if _merge == 3 & begin_series == 0
		merge m:1 label1 label2 using `supersedes_b', gen(_merge2)
		order begin_series end_series initialstring label0 label1 label2 label3 _merge _merge2

		sort _merge _merge2 begin_series end_series initialstring label0 label1 label2 label3
		assert end_series == 0 if _merge == 3 & _merge2 == 3
		replace end_series = 1 if _merge == 3 & _merge2 == 1
		
		// iterate
		preserve
			keep if _merge2 == 2
			ren label3 label4
			ren label2 label3
			ren label1 label2
			keep label2 label3 label4
			tempfile supersedes_c
			save `supersedes_c', replace
		restore
		drop if _merge2 == 2
		merge m:1 label2 label3 using `supersedes_c', gen(_merge3)

		order begin_series end_series initialstring label0 label1 label2 label3 label4 _merge _merge2 _merge3

		sort _merge _merge2 _merge3 begin_series end_series initialstring label0 label1 label2 label3 label4
		
		assert end_series == 0 if _merge2 == 3 & _merge3 == 3
		replace end_series = 1 if _merge2 == 3 & _merge3 == 1
		
		
		
		// iterate
		preserve
			keep if _merge3 == 2
			ren label4 label5
			ren label3 label4
			ren label2 label3
			keep label3 label4 label5
			tempfile supersedes_d
			save `supersedes_d', replace
		restore
		drop if _merge3 == 2
		merge m:1 label3 label4 using `supersedes_d', gen(_merge4)

		order begin_series end_series initialstring label0 label1 label2 label3 label4 label5 _merge _merge2 _merge3 _merge4

		sort _merge _merge2 _merge3 _merge4 begin_series end_series initialstring label0 label1 label2 label3 label4 label5
	
		
		assert end_series == 0 if _merge3 == 3 & _merge4 == 3
		replace end_series = 1 if _merge3 == 3 & _merge4 == 1
		
		// iterate
		preserve
			keep if _merge4 == 2
			ren label5 label6
			ren label4 label5
			ren label3 label4
			keep label4 label5 label6
			tempfile supersedes_e
			save `supersedes_e', replace
		restore
		drop if _merge4 == 2
		merge m:1 label4 label5 using `supersedes_e', gen(_merge5)

		order begin_series end_series initialstring label0 label1 label2 label3 label4 label5 label6 _merge _merge2 _merge3 _merge4 _merge5

		sort _merge _merge2 _merge3 _merge4 _merge5 begin_series end_series initialstring label0 label1 label2 label3 label4 label5 label6
	
		
		assert end_series == 0 if _merge4 == 3 & _merge5 == 3
		replace end_series = 1 if _merge4 == 3 & _merge5 == 1
		
		assert initialstring == 1 if !mi(label0)
		gen mostrecent_label = label1
		reshape long label, i(mostrecent_label initialstring) j(order)
		drop if label == ""
		drop if initialstring == 1 & order == 0
		keep label mostrecent_label order
		tempfile mergedlabels
		save `mergedlabels', replace
	
	// merge the merged label series.
	use `saved1', clear
	merge m:1 label using `mergedlabels'
	drop _merge
	/* Identify labels to keep
	
	keep label utility name mostrecent_label startdate enddate order
	duplicates drop
	isid utility label
	reshape wide label name startdate enddate, i(utility mostrecent_label) j(order)
	sort utility name1 startdate1 enddate1 startdate2 enddate2 startdate3 enddate3 startdate4 enddate4
	order mostrecent_label utility name1 startdate1 enddate1 startdate2 enddate2 startdate3 enddate3 startdate4 enddate4
	
	*/
			
	
	// manually code obs that we should keep based on rate names, data availability, and such.
	gen keep_label = 0 
	// Anza
	replace keep_label = 1 if mostrecent_label == "539fba1fec4f024bc1dc2a03"
	// Bear Valley
	replace keep_label = 1 if mostrecent_label == "539f6a46ec4f024411ec8db1"
	// City & County of SF
	replace keep_label = 1 if mostrecent_label == "539f73e4ec4f024411ed01ed"
	// Alameda
	replace keep_label = 1 if mostrecent_label == "539f6f54ec4f024411eccd17"
	// Anaheim
	replace keep_label = 1 if mostrecent_label == "539f6dccec4f024411ecba99" // anaheim, only from 2013, unclear which rate to choose
	// Azusa
	replace keep_label = 1 if mostrecent_label == "539f6aadec4f024411ec9329"
	//Banning
	replace keep_label = 1 if mostrecent_label == "539fc344ec4f024c27d8ba71"
	//Burbank
	replace keep_label = 1 if mostrecent_label == "539f6f77ec4f024411eccea1"
	//Corona
	replace keep_label = 1 if mostrecent_label == "539fca15ec4f024d2f53f85a" 
	//Glendale
	replace keep_label = 1 if mostrecent_label == "539f7327ec4f024411ecf941"
	//Healdsburg
	replace keep_label = 1 if mostrecent_label == "539fcb0eec4f024d2f5401aa"
	// Lodi
	replace keep_label = 1 if mostrecent_label == "539f74d3ec4f024411ed0cc9"
	// Lompoc
	replace keep_label = 1 if mostrecent_label == "539fb3efec4f024bc1dbe3d7"
	// Moreno Valley
	replace keep_label = 1 if mostrecent_label == "539fbd02ec4f024c27d8767f"
	// Palo Alto
	replace keep_label = 1 if mostrecent_label == "539f6a9eec4f024411ec923b"
	// Pasadena
	replace keep_label = 1 if mostrecent_label == "53fce22f5257a37f4ddbd602"
	// Redding
	replace keep_label = 1 if mostrecent_label == "539fba57ec4f024bc1dc2d17"
	// Riverside
	replace keep_label = 1 if mostrecent_label == "539f6d5cec4f024411ecb4c1" 
	// Roseville
	replace keep_label = 1 if mostrecent_label == "539fc38fec4f024c27d8bd83"
	// Santa Clara
	replace keep_label = 1 if mostrecent_label == "539fc15fec4f024c27d8a685"
	// Shasta Lake
	replace keep_label = 1 if mostrecent_label == "539fc3edec4f024c27d8c199"
	// Ukiah
	replace keep_label = 1 if mostrecent_label == "539fc2fbec4f024c27d8b745"
	// Vernon
	replace keep_label = 1 if mostrecent_label == "539f6a63ec4f024411ec8f1d"
	// Imperial Irrigation District (IID)
	replace keep_label = 1 if mostrecent_label == "539f6cfaec4f024411ecb0ab"
	
	// Lassen
	replace keep_label = 1 if mostrecent_label == "5405c21b5257a32f22af34b3"
	replace keep_label = 1 if mostrecent_label == "539f6e4bec4f024411ecc073" // this needs to be merged into one mostrecent_label 5405c21b5257a32f22af34b3
	
	// LADWP
	replace keep_label = 1 if mostrecent_label == "539fb848ec4f024bc1dc15b9" // change to 573a3bfd5457a3a673f19c7a
	replace keep_label = 1 if mostrecent_label == "573a3b5c5457a37859f19c7a" 
	replace keep_label = 1 if mostrecent_label == "539f7506ec4f024411ed0f81" // change to 573a3b5c5457a37859f19c7a
	replace keep_label = 1 if mostrecent_label == "573a3bfd5457a3a673f19c7a"
	//Merced
	replace keep_label = 1 if mostrecent_label == "539fc284ec4f024c27d8b255"
	// Modesto
	replace keep_label = 1 if mostrecent_label == "539f7327ec4f024411ecf95b"
	
	//PGE
	replace keep_label = 1 if mostrecent_label == "539f7337ec4f024411ecf9d9" // 586eaa895457a3131d1c9605
	replace keep_label = 1 if mostrecent_label == "539f6b25ec4f024411ec98c9" // 574c955f5457a367445e629e
	
	replace keep_label = 1 if mostrecent_label == "539f6d3cec4f024411ecb385" // 539f721bec4f024411eced9d
	replace keep_label = 1 if mostrecent_label == "539fcbadec4f024d2f5407fe" // 574c96455457a3c01f5e62a2
	replace keep_label = 1 if mostrecent_label == "539fbd5aec4f024c27d87ac1" // 574c96a45457a30e465e629f
	replace keep_label = 1 if mostrecent_label == "539f6bdcec4f024411eca249" // 574c97215457a3c01f5e62a3
	replace keep_label = 1 if mostrecent_label == "539f6b53ec4f024411ec9b6b" // 574c97955457a34a4b5e629d
	replace keep_label = 1 if mostrecent_label == "539fb824ec4f024bc1dc13eb" // 574c98225457a3874b5e629d
	replace keep_label = 1 if mostrecent_label == "539fb4adec4f024bc1dbebf3" // 574c988c5457a3c01f5e62a4
	replace keep_label = 1 if mostrecent_label == "539f6d4cec4f024411ecb45b" // 574c98f25457a367445e629f
	
	replace keep_label = 1 if mostrecent_label == "5786e1135457a33c6529b8a1" // 586eaa895457a3131d1c9605
	replace keep_label = 1 if mostrecent_label == "586eaa895457a3131d1c9605" 
	replace keep_label = 1 if mostrecent_label == "5550dad15457a3267f8b4568" // 586eaa895457a3131d1c9605
	replace keep_label = 1 if mostrecent_label == "574c955f5457a367445e629e"
	replace keep_label = 1 if mostrecent_label == "574c95d85457a3c01f5e62a1"
	replace keep_label = 1 if mostrecent_label == "574c96455457a3c01f5e62a2"
	replace keep_label = 1 if mostrecent_label == "574c96a45457a30e465e629f"
	replace keep_label = 1 if mostrecent_label == "574c97215457a3c01f5e62a3"
	replace keep_label = 1 if mostrecent_label == "574c97955457a34a4b5e629d"
	replace keep_label = 1 if mostrecent_label == "574c98225457a3874b5e629d"
	replace keep_label = 1 if mostrecent_label == "574c988c5457a3c01f5e62a4"
	replace keep_label = 1 if mostrecent_label == "574c98f25457a367445e629f"
	
	// Plumas-Sierra
	replace keep_label = 1 if mostrecent_label == "542980605257a3fd4a3958fe"
	replace keep_label = 1 if mostrecent_label == "539fb7a9ec4f024bc1dc0e0d"
	
	// SMUD
	replace keep_label = 1 if mostrecent_label == "539fbed7ec4f024c27d88aaf"
	replace keep_label = 1 if mostrecent_label == "5a54f0555457a3674e423a7c"
	replace keep_label = 1 if mostrecent_label == "539fb6e9ec4f024bc1dc0587"
	replace keep_label = 1 if mostrecent_label == "587e927e5457a31645316ce6"

	// SDGE
	replace keep_label = 1 if mostrecent_label == "539fc493ec4f024c27d8c8bf"
	replace keep_label = 1 if mostrecent_label == "539f7192ec4f024411ece727"
	replace keep_label = 1 if mostrecent_label == "539fc2e7ec4f024c27d8b6c5"
	replace keep_label = 1 if mostrecent_label == "539fbeb3ec4f024c27d8891d"
	
	// SCE
	replace keep_label = 1 if mostrecent_label == "539fb9a4ec4f024bc1dc2495"
	replace keep_label = 1 if mostrecent_label == "539f74f5ec4f024411ed0ead"
	replace keep_label = 1 if mostrecent_label == "539f7193ec4f024411ece7b3"
	replace keep_label = 1 if mostrecent_label == "539f702fec4f024411ecd73b"
	replace keep_label = 1 if mostrecent_label == "539f70c4ec4f024411ecde75"
	replace keep_label = 1 if mostrecent_label == "539f6a8fec4f024411ec914d"
	replace keep_label = 1 if mostrecent_label == "539f6a23ec4f024411ec8bad"
	replace keep_label = 1 if mostrecent_label == "539f7172ec4f024411ece65b"
	replace keep_label = 1 if mostrecent_label == "539fbf45ec4f024c27d88f8f"
	
	// Truckee
	replace keep_label = 1 if mostrecent_label == "539fc355ec4f024c27d8bac9"
	// Turlock
	replace keep_label = 1 if mostrecent_label == "539f6bdcec4f024411eca235"
	



	
	// combine label IDs in cases when the IDs arent mapped to each other but should be (from visual inspection)
	replace mostrecent_label = "5405c21b5257a32f22af34b3" if mostrecent_label == "539f6e4bec4f024411ecc073" 
	

	replace mostrecent_label = "573a3bfd5457a3a673f19c7a" if mostrecent_label == "539fb848ec4f024bc1dc15b9" 
	replace mostrecent_label = "573a3b5c5457a37859f19c7a" if mostrecent_label == "539f7506ec4f024411ed0f81" 
	
	//PGE
	replace mostrecent_label = "586eaa895457a3131d1c9605" if mostrecent_label == "539f7337ec4f024411ecf9d9"
	replace mostrecent_label = "574c955f5457a367445e629e" if mostrecent_label == "539f6b25ec4f024411ec98c9" 
	
	replace mostrecent_label = "539f721bec4f024411eced9d" if mostrecent_label == "539f6d3cec4f024411ecb385" // 
	replace mostrecent_label = "574c96455457a3c01f5e62a2" if mostrecent_label == "539fcbadec4f024d2f5407fe" // 574c96455457a3c01f5e62a2
	replace mostrecent_label = "574c96a45457a30e465e629f" if mostrecent_label == "539fbd5aec4f024c27d87ac1" // 574c96a45457a30e465e629f
	replace mostrecent_label = "574c97215457a3c01f5e62a3" if mostrecent_label == "539f6bdcec4f024411eca249" // 574c97215457a3c01f5e62a3
	replace mostrecent_label = "574c97955457a34a4b5e629d" if mostrecent_label == "539f6b53ec4f024411ec9b6b" // 574c97955457a34a4b5e629d
	replace mostrecent_label = "574c98225457a3874b5e629d" if mostrecent_label == "539fb824ec4f024bc1dc13eb" // 574c98225457a3874b5e629d
	replace mostrecent_label = "574c988c5457a3c01f5e62a4" if mostrecent_label == "539fb4adec4f024bc1dbebf3" // 574c988c5457a3c01f5e62a4
	replace mostrecent_label = "574c98f25457a367445e629f" if mostrecent_label == "539f6d4cec4f024411ecb45b" // 574c98f25457a367445e629f
	
	replace mostrecent_label = "586eaa895457a3131d1c9605" if mostrecent_label == "5786e1135457a33c6529b8a1" // 586eaa895457a3131d1c9605
	 
	replace mostrecent_label = "586eaa895457a3131d1c9605" if mostrecent_label == "5550dad15457a3267f8b4568" // 586eaa895457a3131d1c9605
	
	keep if keep_label == 1
	drop keep_label
	
	// issues with rates overlap because of these addtional ID matches
	* start with encoding (again) the date variables, then drop old rates.
	foreach var in startdate enddate lastupdate {
		ren `var' `var'_t
		gen `var'_t1 = clock(`var'_t, "MDYhm")
		format `var'_t1 %tc
		gen `var' = dofc(`var'_t1)
		format `var' %td
		drop `var'_t `var'_t1
	}
	gsort mostrecent_label year month -startdate
	by mostrecent_label year month: keep if _n == 1
	drop pastlastupdate
	bys mostrecent_label: egen startdate_newlabel = min(startdate) if !mi(startdate)
	bys mostrecent_label: egen enddate_newlabel = max(enddate) if !mi(enddate)
	tostring year, gen(temp_y)
	tostring month, gen(temp_m)
	gen strref = temp_y + "/" + temp_m + "/15"
	gen refdate = date(strref, "YMD")
	format refdate %td
	drop strref temp_*
	gen pastlastupdate = (enddate_newlabel == .) & (refdate >  lastupdate)
	drop refdate
	// residual cleaning and labeling
	drop TVar* supersedes order
	gen max_rate_daily =  ERSWkdayMax_t0M <100 if !mi(ERSWkdayMax_t0M)
	forvalues i = 0/3 {
		replace ERSWkdayMax_t`i'M = ERSWkdayMax_t`i'M*30 if max_rate_daily == 1
		replace ERSWkdayMax_t`i'M = . if ERSWkdayMax_t`i'M == 0
	}
	drop max_rate_daily
	gen HighestTierAvgRate = ERSWkdayAvgRate_t0M
	la var HighestTierAvgRate "Rate at highest tier (weekday)"
	gen MaxTierUsageAmt =  ERSWkdayMax_t0M
	la var MaxTierUsageAmt "Monthly usage to get to highest tier"
	forvalues i = 1/4 {
		replace HighestTierAvgRate = ERSWkdayAvgRate_t`i'M if !mi(ERSWkdayAvgRate_t`i'M)
		replace MaxTierUsageAmt = ERSWkdayMax_t`i'M if !mi(ERSWkdayMax_t`i'M)
	}
	forvalues i = 0/4 {
		ren ERSWkdayAvgRate_t`i'M AvgRateTier`i'
		la var AvgRateTier`i' "Weekday rate: tier `i'"
		ren ERSWkdayMax_t`i'M Tier`i'UsageAmt
		la var Tier`i'UsageAmt "Monthly usage to go past tier `i'"
	}
	order eiaid label startdate enddate mostrecent_label year month pastlastupdate 
	sort eiaid mostrecent_label year month 
	la var eiaid "EIA ID (utility)"
	la var label "Rate ID label, Open EI"
	la var mostrecent_label "Time consistent rate ID label, Open EI"
	la var pastlastupdate "rate info past last update (Open EI)"
	la var startdate "timing info at the label level"
	la var enddate "timing info at the label level"

	tempfile flatrate_cleaned
	save `flatrate_cleaned', replace	
}
if $section2_manual == 1 {
	import excel "${ElectricityRates}/tiered_residential_rates_compiled.xlsx", clear firstrow
	gen tempid = _n
	
	// adjust rates
	forvalues i = 0/4 {
		replace ERSWkdayAvgRate_t`i' = ERSWkdayAvgRate_t`i' + adjustment if !mi(adjustment)
	}
	drop adjustment
	tempfile merge_data
	save `merge_data', replace
	
	forvalues i = 2014/2018 {
		gen a_`i' = `i'
	}
	forvalues i = 1/12 {
		gen b_`i' = `i'
	}
	keep tempid a_* b_*
	reshape long b_, i(tempid a_2014-a_2018) j(month)
	drop b_
	reshape long a_, i(tempid month) j(year)
	drop a_
	merge m:1 tempid using `merge_data'
	assert _merge == 3
	drop _merge
	// ref date (15th of month) as cutoff.
	tostring year, gen(year_s)
	tostring month, gen(month_s)
	gen date_string_t = year_s + "/" + month_s + "/15"
	gen date_ref = date(date_string_t, "YMD")
	drop date_string_t year_s month_s
	format date_ref %td
	drop if date_ref < startdate | date_ref > enddate
	sort tempid utility year month
	isid tempid year month
	drop tempid date_ref

	gen mostrecent_label = "Manually collected: " +  utility
	gen manually_collected = 1
	
	// renaming vars and creating aggregate max threshold/rate vars
	gen HighestTierAvgRate = ERSWkdayAvgRate_t0M
	la var HighestTierAvgRate "Rate at highest tier (weekday)"
	gen MaxTierUsageAmt =  ERSWkdayAvg_t0max
	la var MaxTierUsageAmt "Monthly usage to get to highest tier"
	replace MaxTierUsageAmt = MaxTierUsageAmt*30 if max_unit == "day"
	drop max_unit
	forvalues i = 1/4 {
		replace HighestTierAvgRate = ERSWkdayAvgRate_t`i'M if !mi(ERSWkdayAvgRate_t`i'M)
		cap replace MaxTierUsageAmt = ERSWkdayAvg_t`i'max if !mi(ERSWkdayAvg_t`i'max) // cap b.c. there is no max rate at four
	}
	forvalues i = 0/4 {
		ren ERSWkdayAvgRate_t`i'M AvgRateTier`i'
		la var AvgRateTier`i' "Weekday rate: tier `i'"
		cap ren ERSWkdayAvg_t`i'max Tier`i'UsageAmt
		cap la var Tier`i'UsageAmt "Monthly usage to go past tier `i'"
	}
	tempfile manually_collected
	save `manually_collected', replace

}
if $section2_manual_IOUs == 1 {
	foreach IOU in SDGE SCE {
		import excel "${ElectricityRates}/`IOU'_rate_schedules_manually_collected.xlsx", clear firstrow sheet("`IOU'_rates")
		gen tempid = _n
	
		// adjust rates
		forvalues i = 0/4 {
			replace ERSWkdayAvgRate_t`i' = ERSWkdayAvgRate_t`i' + adjustment if !mi(adjustment)
		}
		drop adjustment
		tempfile merge_data
		save `merge_data', replace
		
		forvalues i = 2014/2017 {
			gen a_`i' = `i'
		}
		forvalues i = 1/12 {
			gen b_`i' = `i'
		}
		keep tempid a_* b_*
		reshape long b_, i(tempid a_2014-a_2017) j(month)
		drop b_
		reshape long a_, i(tempid month) j(year)
		drop a_
		merge m:1 tempid using `merge_data'
		assert _merge == 3
		drop _merge
		// ref date (15th of month) as cutoff.
		tostring year, gen(year_s)
		tostring month, gen(month_s)
		gen date_string_t = year_s + "/" + month_s + "/15"
		gen date_ref = date(date_string_t, "YMD")
		drop date_string_t year_s month_s
		format date_ref %td
		drop if date_ref < startdate | date_ref > enddate
		sort tempid utility year month
		isid tempid year month
		drop tempid date_ref
		
		gen mostrecent_label = "Manually collected: " +  utility
		gen manually_collected = 1
		
		// renaming vars and creating aggregate max threshold/rate vars
		gen HighestTierAvgRate = ERSWkdayAvgRate_t0M
		la var HighestTierAvgRate "Rate at highest tier (weekday)"
		gen MaxTierUsageAmt =  ERSWkdayAvg_t0max
		la var MaxTierUsageAmt "Monthly usage to get to highest tier"
		drop max_unit
		forvalues i = 1/4 {
			replace HighestTierAvgRate = ERSWkdayAvgRate_t`i'M if !mi(ERSWkdayAvgRate_t`i'M)
			cap replace MaxTierUsageAmt = ERSWkdayAvg_t`i'max if !mi(ERSWkdayAvg_t`i'max) // cap b.c. there is no max rate at four
		}
		forvalues i = 0/4 {
			ren ERSWkdayAvgRate_t`i'M AvgRateTier`i'
			la var AvgRateTier`i' "Weekday rate: tier `i'"
			cap ren ERSWkdayAvg_t`i'max Tier`i'UsageAmt
			cap la var Tier`i'UsageAmt "Monthly usage to go past tier `i'"
		}
		
		// Shotaro: this is temporary and needs to be fixed (SCE server is down)
		if "`IOU'" == "SCE" {
			drop Tier?UsageAmt 
		}
		tempfile manually_collected_`IOU'
		save `manually_collected_`IOU'', replace
		
	}
	
	
	// baseline allowance rate by region (climate zone) for SDGE
	import excel "${ElectricityRates}/SDGE_rate_schedules_manually_collected.xlsx", clear firstrow sheet("SDGE_BasicAllowance") cellrange(A1:F13)
	gen tempid = _n
	
	tempfile merge_data
	save `merge_data', replace
	
	forvalues i = 2014/2017 {
		gen a_`i' = `i'
	}
	forvalues i = 1/12 {
		gen b_`i' = `i'
	}
	keep tempid a_* b_*
	reshape long b_, i(tempid a_2014-a_2017) j(month)
	drop b_
	reshape long a_, i(tempid month) j(year)
	drop a_
	merge m:1 tempid using `merge_data'
	assert _merge == 3
	drop _merge
	
	
	// ref date (15th of month) as cutoff.
	tostring year, gen(year_s)
	tostring month, gen(month_s)
	gen date_string_t = year_s + "/" + month_s + "/15"
	gen date_ref = date(date_string_t, "YMD")
	drop date_string_t year_s month_s
	format date_ref %td
	drop if date_ref < ValidFrom | date_ref > ValidTo
	sort tempid year month
	isid tempid year month
	drop tempid date_ref ValidFrom ValidTo
	
	merge 1:1 year month using `manually_collected_SDGE'
	
	foreach region in Coastal Inland Mountain Desert {
		forvalues i = 0/3 {
			gen Tier`i'UsageAmt_`region' = Tier`i'UsageAmt*`region'*30 // assume that 1 month=30days
		}
		drop `region'
	}
	drop Tier?UsageAmt
	drop startdate enddate
	// save over the tempfile
	save `manually_collected_SDGE', replace

}




if $Data_Electric_section2_EVrate == 1 {
	use `sec2_prelim', clear
	keep if EV == 1
	drop if label == "539f7358ec4f024411ecfb5d" 
	// So construct min, max, mean daily rates for each tier. 
	** first create a panel data to merge to.

	preserve
		keep label eia startdate enddate lastupdate
		
		forvalues m = 1/12 {
			gen mvar`m' = 1
		}
		forvalues y = 2014/2018 {
			gen yvar`y' =  1
		}
		reshape long mvar, i(label eia startdate enddate yvar*) j(month)
		drop mvar
		reshape long yvar, i(label eia startdate enddate month) j(year)
		drop yvar
		sort label eia startdate enddate lastupdate year month
		order label eia startdate enddate lastupdate year month
		tostring year, replace
		tostring month, replace
		gen refdatestr =  month + "/" + "15" + "/" + year 
		gen refdate = date(refdatestr, "MDY")
		format refdate %td
		foreach var in startdate enddate lastupdate {
			ren `var' `var'_t
			gen `var'_t1 = clock(`var'_t, "MDYhm")
			format `var'_t1 %tc
			gen `var' = dofc(`var'_t1)
			format `var' %td
		}
		// trim panel to applicable ranges.
		drop if refdate < startdate & startdate !=. // too early
		drop if refdate > enddate & enddate != . // too late
		gen pastlastupdate = (enddate == .) & (refdate >  lastupdate)
		keep eiaid label year month pastlastupdate
		destring year, replace
		destring month, replace

		tempfile paneltemplate_EV
		save `paneltemplate_EV', replace
	restore	
	
	
	// create daily average rates
	** create variables indicating the shares of each period indexes within the day
	forvalues m = 1/12 { // month 
		forvalues i = 0/11 { 
			egen ShERSWkdayM`m'_p`i' = anycount(ERSWkdaySchH*M`m'), values(`i')
			replace ShERSWkdayM`m'_p`i' = ShERSWkdayM`m'_p`i' /24
		}
		egen ShERSWkDayM`m'_pSum  = rowtotal(ShERSWkdayM`m'_p*)
		replace ShERSWkDayM`m'_pSum  = round(ShERSWkDayM`m'_pSum)
		assert ShERSWkDayM`m'_pSum == 1
		drop ShERSWkDayM`m'_pSum
	}
	
	// add adjustment rates 
		forvalues k = 0/4 { // tiers within each period index
			forvalues i = 0/5 { // periods zero to five have four tiers. rest only have one.
				replace ERS_p`i'_t`k'rate = ERS_p`i'_t`k'rate  + ERS_p`i'_t`k'adj if !mi(ERS_p`i'_t`k'adj)
			}
		}

	// create daily average rate by each tier for each month.
	// m is month, k is period (there are different tier rates within each period assignment)
	forvalues m = 1/12 {
		forvalues k = 0/4 { // tiers within each period index
			forvalues i = 0/5 { // periods zero to five have four tiers. rest only have one.
				gen temprate_m`m'_p`i'_t`k' = ERS_p`i'_t`k'rate * ShERSWkdayM`m'_p`i'
			} // end period index loop
			if `k' == 0 {
				forvalues i = 6/11 {
					replace ERS_p`i'_t`k'rate = ERS_p`i'_t`k'rate  + ERS_p`i'_t`k'adj if !mi(ERS_p`i'_t`k'adj)
					gen temprate_m`m'_p`i'_t`k' = ERS_p`i'_t`k'rate * ShERSWkdayM`m'_p`i'
				} // end period index loop
			}
			egen WkdyAvgEVRateT`k'M`m' = rowtotal(temprate_m`m'_p*_t`k')
			replace WkdyAvgEVRateT`k'M`m' = . if WkdyAvgEVRateT`k'M`m' == 0
			drop temprate_m`m'_p?_t`k'
		} // end tier loop
	} // end month loop
	
	
	
	// create daily (monthly, really) min (off-peak), max (peak), and other (shoulder) rates.
	forvalues m = 1/12  {
		forvalues k = 0/4 {
			forvalues i = 0/4 {
				gen EVRateP`i'Tier`k'M`m' = ERS_p`i'_t`k'rate if ShERSWkdayM`m'_p`i' >0 & EV == 1
			}
			if `k' == 0 | `k' == 1 {
				forvalues i = 5/5 {
					gen EVRateP`i'Tier`k'M`m' = ERS_p`i'_t`k'rate if ShERSWkdayM`m'_p`i' >0 & EV == 1
				}
			}
			if `k' == 0 {
				forvalues i = 6/11 {
					gen EVRateP`i'Tier`k'M`m' = ERS_p`i'_t`k'rate if ShERSWkdayM`m'_p`i' >0 & EV == 1
				}
			}
		}
	}

	drop ShERSWkdayM*
	
	keep label eia name utility startdate enddate supersedes lastupdate WkdyAvgEVRateT?M* EVRateP* 
	sort label eia name utility startdate enddate supersedes lastupdate WkdyAvgEVRateT?M* EVRateP0* EVRateP1*   
	duplicates drop
	
	duplicates tag supersedes, gen(tag)
		br if tag == 1
	drop tag
	
	preserve
		keep WkdyAvgEVRateT* label utility eia name lastupdate startdate enddate supersedes
		reshape long WkdyAvgEVRateT0M WkdyAvgEVRateT1M WkdyAvgEVRateT2M WkdyAvgEVRateT3M WkdyAvgEVRateT4M, ///
			i(label utility eia name lastupdate startdate enddate supersedes) j(month)
		tempfile EVRateAvg
		save `EVRateAvg', replace
	restore
	forvalues i = 0/11 {
		preserve
			keep EVRateP`i'Tier* label utility eia name lastupdate startdate enddate supersedes
			reshape long EVRateP`i'Tier0M EVRateP`i'Tier1M EVRateP`i'Tier2M EVRateP`i'Tier3M EVRateP`i'Tier4M, ///
				i(label utility eia name lastupdate startdate enddate supersedes) j(month)
			tempfile EVRateAvgP`i'
			save `EVRateAvgP`i'', replace
		restore
	}
	use `EVRateAvg', clear
	
	forvalues i = 0/11 {
		merge 1:1 label month using `EVRateAvgP`i''
		assert _merge == 3
		drop _merge
	}
	
	// rename
	forvalues k = 0/4 { // periods
		forvalues i = 0/11 { // tiers
			cap ren EVRateP`i'Tier`k'M EVRateTier`k'P`i'
		}
		ren WkdyAvgEVRateT`k'M WkdyAvgEVRateT`k'
	}
	
	//sort rates, put them in order of price
	reshape long EVRateTier0P EVRateTier1P EVRateTier2P EVRateTier3P EVRateTier4P, i(label utility eiaid name lastupdate startdate enddate supersedes month) j(period)
	isid eiaid label month period
	drop if EVRateTier0P == .
	sort eiaid label month EVRateTier0P
	bys eiaid label month: gen Rate = _n
	replace Rate = Rate - 1 // set with the convention of this data set, starting index from 0.
	bys eiaid label month: gen MaxRate = _N
	replace MaxRate = MaxRate - 1
	drop period
	reshape wide EVRateTier0P EVRateTier1P EVRateTier2P EVRateTier3P EVRateTier4P, i(label utility eiaid name lastupdate startdate enddate supersedes month) j(Rate)
	order WkdyAvgEVRateT0 WkdyAvgEVRateT1 WkdyAvgEVRateT2 WkdyAvgEVRateT3 WkdyAvgEVRateT4, after(month)
	// create peak and off peak rates
	forvalues k = 0/4 {
		ren EVRateTier`k'P0 EVRateOffPeakTier`k'
		gen EVRatePeakTier`k' = EVRateTier`k'P2
		replace EVRatePeakTier`k' = EVRateTier`k'P1 if EVRatePeakTier`k' == . // replace with the 2nd period rates (indexed at P = 2-1 = 1)
		replace EVRatePeakTier`k'  = EVRateOffPeakTier`k' if EVRatePeakTier`k' == . // set peak rate to the first rate, i.e. equal to off peak
		gen EVRateShoulderTier`k' = EVRateTier`k'P1  if EVRatePeakTier`k' !=  EVRateTier`k'P1
	}
	
	drop EVRateTier0P1-MaxRate
	
	merge 1:m label month using `paneltemplate_EV'
	
	tab _merge 
	
	keep if _merge == 3
	drop _merge
	sort eiaid label year month
	order eiaid label year month
	duplicates tag supersedes year month if supersedes != "", gen(tag2)
	assert (tag2 == 0 | tag == .)
	drop tag2
	
	// save the cleaned files so far
	tempfile saved1
	save `saved1', replace
		
		
	// keep which obs to keep for each EIAID.
	
	/*Run a series of merge and delete commands to create a wide  dataset 
	where i is a new identifier for the string of labels, j is the order of 
	those strings chronologically. Start with the most recent, then work back in time.
	*/
	keep label supersedes
	duplicates drop
	
	ren supersedes label0
	ren label label1
	
	preserve
		keep if label0 != ""
		ren label1 label2
		ren label0 label1
		tempfile supersedes_a
		save `supersedes_a', replace
	restore
	merge 1:1 label1 using `supersedes_a'
	/* _merge results:
	1. means the label is not followed by another label.
	2. means the supersedes variable value was not in our rate sheet
	3. means the supersedes variable value was in our rate sheet.
	*/
	order label0 label1 label2
	
	sort _merge label0 label1 label2
	gen label_unused = label1 != "" & _merge == 2
	// omit obs's with non-existing superseding labels from the list of strings to be merged onto again.
	// if the subset of observations with non missing supersedes variable didnt match, that means the superseding label does not exist in data.
	// So we create indicators for those superseding labels whose rate we dont have in data.
	assert _merge != 2

	// create indicators for values dropped
	* drop observations whose preceding label was matched to the existing list of labels. 
	gen drop_end = 0 // drop end of a string that's matched elsewhere but ends with the label1 value. _merge == 1
	levelsof label1 if _merge == 3, local(match_b)
	foreach num of local match_b {
		replace drop_end = 1 if label0 == "`num'" & _merge == 1
	}	
	drop if drop_end == 1
	
	// drop obs whose superseding labels did not match. the series starting with these labels are flagged elsewhere, i.e. these are duplicate observations
	drop if _merge == 2
	drop drop_end label_unused
	
	// create indicators for whether the beginning and the end of the series are identified for each row
	// The objective is to set these dummies to one for all obs.
	gen begin_series = 0
	*replace begin_series = 1 if initialstring == 1
	replace begin_series = 1 if label0 == "" // this is essentially is if label0 == 0, no superseding label, so the begging of a series
	
	gen end_series = label2 == "" // i.e. not matched  in the initial merge
	* drop _merge
	order begin_series end_series label0 label1 label2

	preserve
		keep if _merge == 3 & begin_series == 0
		ren label2 label3
		ren label1 label2
		ren label0 label1
		keep label1 label2 label3
		tempfile supersedes_b
		save `supersedes_b', replace
	restore
	// drop the series obs that we are now merging
	drop if _merge == 3 & begin_series == 0
	merge m:1 label1 label2 using `supersedes_b', gen(_merge2)
	order begin_series end_series label0 label1 label2 label3 _merge _merge2

	sort _merge _merge2 begin_series end_series label0 label1 label2 label3
	assert end_series == 0 if _merge == 3 & _merge2 == 3
	replace end_series = 1 if _merge == 3 & _merge2 == 1

		
	// iterate
	preserve
		keep if _merge2 == 2
		ren label3 label4
		ren label2 label3
		ren label1 label2
		keep label2 label3 label4
		tempfile supersedes_c
		save `supersedes_c', replace
	restore
	drop if _merge2 == 2
	merge m:1 label2 label3 using `supersedes_c', gen(_merge3)

	order begin_series end_series label0 label1 label2 label3 label4 _merge _merge2 _merge3

	sort _merge _merge2 _merge3 begin_series end_series label0 label1 label2 label3 label4
	
	assert end_series == 0 if _merge2 == 3 & _merge3 == 3
	replace end_series = 1 if _merge2 == 3 & _merge3 == 1
	
	// the end of a series of merging
	assert _merge3 != 2
	
	assert label0 == ""
	gen mostrecent_label = label1
	reshape long label, i(mostrecent_label) j(order)
	drop if label == ""
	keep label mostrecent_label order
	tempfile mergedlabels
	save `mergedlabels', replace
	
	// merge the merged label series.
	use `saved1', clear
	merge m:1 label using `mergedlabels'
	tab _merge
	drop _merge
	
	
	// finally, collapse date down to the utility-year-month level
	ren pastlastupdate lastlastupdate_EV
	collapse WkdyAvgEVRateT? EVRateOffPeakTier? lastlastupdate_EV EVRatePeakTier? EVRateShoulderTier?, by(year month utility eiaid)
	
	tempfile EVrates_clean
	save `EVrates_clean', replace
}

// plot out data to see what's missing
if $Data_Electric_section3 == 1 {
	use `flatrate_cleaned', clear
	append using `manually_collected'
	gen modate = ym(year, month) 
	format modate %tm
	encode mostrecent_label, gen(rate_id_combined)
	tsset rate_id_combined modate
	save "${Data_Clean}/Electricity_rate_data_panel_uncollapsed.dta", replace

	// plot rate graphs
	
	levelsof utility, local(utility_list)
	
	drop if manually_collected == 1 // then we merge it back on later
	
	// Now we turn this data set down to the monthyear-utility level
	// we want to disaggregate PGE rates by region
	
	gen region = name if eiaid == 14328 
	replace region = subinstr(region, "E-1 Baseline Region ", "", .)
	replace region = trim(subinstr(region, "E-1 Residential Service Baseline Region ", "", .))
	replace region = "0" if mi(region) // this is for other service areas.
	// "reshaping" to allow for multiple rate schedules per 
	levelsof region, local(region)
	foreach reg of local region {
		foreach var of varlist fixedchargefirstmeter AvgRateTier? Tier?UsageAmt HighestTierAvgRate MaxTierUsageAmt pastlastupdate {
			gen `var'_`reg'temp = `var' if region == "`reg'"
			bys eiaid year month: egen `var'_`reg' = mode(`var'_`reg'temp)
			drop `var'_`reg'temp
		}
	}
	collapse (max) AvgRateTier* Tier?UsageAmt* MaxTierUsageAmt* HighestTierAvgRate* pastlastupdate*, by(year month utility eiaid)
	append using `manually_collected' // merge manual data back on
	append using `manually_collected_SDGE', force
	append using `manually_collected_SCE', force

	// add eiaid to obs that are missing it (from manual data)
	bys utility: egen eiaid_mean = mode(eiaid)
	replace eiaid = eiaid_mean
	drop eiaid_mean
	// take obs from manual if there's more than one ob per month-year
	bys year month utility: gen N = _N
	tab N manually_collected, mi
	drop if N == 2 & manually_collected != 1  
	drop N
	
	// merge EV rate data
	merge 1:1 utility year month using `EVrates_clean', gen(_merge_EV)
	// keep all merge results.(b/c some months only have EV, others only tier, others both)
	drop _merge_EV
	// add eiaid to obs that are missing it (from manual data)
	bys utility: egen eiaid_mean = mode(eiaid)
	replace eiaid = eiaid_mean
	drop eiaid_mean
	isid eiaid year month
	gen modate = ym(year, month) // we need this for tsset later. 
	order modate, after(month)
	la var modate "Year month"
	
	forvalues i = 0/4 {
		la var AvgRateTier`i' "Weekday average standard domestic rate at tier `i'"
		la var WkdyAvgEVRateT`i' "Average EV rate at tier `i'"
		la var EVRateOffPeakTier`i' "Off peak EV rate at tier `i'"
		la var EVRatePeakTier`i' "Peak EV rate at tier `i'"
		la var EVRateShoulderTier`i' "Shoulder EV rate at tier `i'"
	}
	save "${Data_Clean}/Electricity_rate_data_panel_collapsed.dta", replace
}

// graph etc.
if $Data_Electric_section4 == 1 {	
	
	use "${Data_Clean}/Electricity_rate_data_panel_collapsed.dta", clear
	// tsset
	format modate %tmMonYY
	encode utility, gen(utility_enc)
	tsset utility_enc modate
	
	preserve
		import excel using  "${ElectricityRates}/utility_names_shorthand.xlsx", clear firstrow
		tempfile shorthand
		save `shorthand', replace
	restore
	merge m:1 eiaid using `shorthand', nogen
	ren utility_shorthand utility_shorthand_str
	gen IOU = inlist(utility_shorthand_str, "PGE", "SDGE", "SCE") 
	sencode utility_shorthand_str, gen(utility_shorthand) gsort(-IOU)
	foreach var in bay_area sd la sac {
		if "`var'" == "bay_area" local area = "Bay Area"
		if "`var'" == "sd" local area = "San Diego" 
		if "`var'" == "la" local area = "Los Angeles" 
		if "`var'" == "sac" local area = "Sacramento"
		
		if "`var'" == "sd" {
			local linethickop = "plot1(lwidth(thick)) plot2(lwidth(thick))"
		}
		if "`var'" != "sd" {
			local linethickop = "plot1(lwidth(thick))"

		}
				

		local dollarsign = char(36)
		xtline HighestTierAvgRate if `var' == 1, overlay i(utility_shorthand) t(modate) ///
		ytitle("`dollarsign' USD/kWh") legend(cols(4)) scheme(slycidas1) yscale(r(0 0.5)) ylabel(#6) ///
		plotregion(margin(b = 0)) xtitle("") ///
		`linethickop'
		
		graph export "${ResultsOut}/TierHighest_`var'.png",  as(png) replace
		
		
		graph close 
	}
	
	
	tab utility year

	tab utility year if pastlastupdate < 1 

}
